{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example complicated lactam analysis.\n",
    "\n",
    "The input cores are generic in atoms and bonds, the system figures out the correct cores for grouping purposes.\n",
    "\n",
    "This may be overkill ( and certainly takes a long enough time ).  What the algorithm is doing is first mapping each core to each other and then trying to optimizes the side chain selection across all sets.  I.e. it tries to make the matching choices such that all R1's are the same, all R2's are the same and so on.\n",
    "\n",
    "**n.b.** *It is important that the cores are input in most to least specific as the RGroupDecomposition takes the first matching core.*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "from rdkit import Chem\n",
    "from rdkit.Chem import AllChem\n",
    "from rdkit.Chem.Draw import IPythonConsole\n",
    "IPythonConsole.ipython_useSVG=True\n",
    "from rdkit.Chem.rdRGroupDecomposition import RGroupDecomposition\n",
    "import pandas as pd\n",
    "from rdkit.Chem import PandasTools\n",
    "from collections import OrderedDict\n",
    "from IPython.display import HTML\n",
    "from rdkit import rdBase\n",
    "rdBase.DisableLog(\"rdApp.debug\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "cores = OrderedDict()\n",
    "\n",
    "cores['six'] = Chem.MolFromSmarts('[#8]-[#6](=O)-*~1~*~*~*~*~2-[#6]-[#6](=O)-*~1~2')\n",
    "cores['five'] = Chem.MolFromSmarts('[#8]-[#6](=O)-*~1~*~*~*~2-[#6]-[#6](=O)-*~1~2')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:rdkit=\"http://www.rdkit.org/xml\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" baseProfile=\"full\" xml:space=\"preserve\" width=\"600px\" height=\"200px\" viewBox=\"0 0 600 200\">\n",
       "<!-- END OF HEADER -->\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"600\" height=\"200\" x=\"0\" y=\"0\"> </rect>\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"600\" height=\"200\" x=\"0\" y=\"0\"> </rect>\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"600\" height=\"200\" x=\"0\" y=\"0\"> </rect>\n",
       "<path class=\"bond-0\" d=\"M 18.7415,92.5686 L 32.7712,99.7103\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0\" d=\"M 32.7712,99.7103 L 46.8009,106.852\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 42.9947,107.053 L 43.8302,122.878\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 43.8302,122.878 L 44.6656,138.704\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 50.6071,106.651 L 51.4425,122.477\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 51.4425,122.477 L 52.278,138.302\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2\" d=\"M 46.8009,106.852 L 62.469,96.6685\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2\" d=\"M 62.469,96.6685 L 78.1371,86.485\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3\" d=\"M 78.4187,79.6396 L 76.7623,48.2619\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-11\" d=\"M 79.5524,86.485 L 109.242,101.598\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-4\" d=\"M 80.256,45.7401 L 108.247,27.5476\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-5\" d=\"M 109.501,27.6522 L 139.19,42.7653\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-6\" d=\"M 142.696,44.943 L 144.344,76.1695\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-7\" d=\"M 144.946,83.0048 L 155.201,98.7817\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-7\" d=\"M 155.201,98.7817 L 165.455,114.559\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-12\" d=\"M 144.062,83.0048 L 116.214,101.105\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-8\" d=\"M 165.455,114.559 L 133.497,135.33\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9\" d=\"M 129.768,134.539 L 126.493,149.976\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9\" d=\"M 126.493,149.976 L 123.218,165.413\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9\" d=\"M 137.225,136.121 L 133.95,151.558\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9\" d=\"M 133.95,151.558 L 130.675,166.995\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-10\" d=\"M 133.497,135.33 L 123.243,119.553\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-10\" d=\"M 123.243,119.553 L 112.989,103.776\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"atom-0\" d=\"M 7.8788 89.5917 Q 7.8788 86.9999, 9.15946 85.5516 Q 10.4401 84.1032, 12.8337 84.1032 Q 15.2273 84.1032, 16.508 85.5516 Q 17.7887 86.9999, 17.7887 89.5917 Q 17.7887 92.214, 16.4928 93.7081 Q 15.1968 95.187, 12.8337 95.187 Q 10.4554 95.187, 9.15946 93.7081 Q 7.8788 92.2293, 7.8788 89.5917 M 12.8337 93.9673 Q 14.4803 93.9673, 15.3646 92.8696 Q 16.2641 91.7567, 16.2641 89.5917 Q 16.2641 87.4725, 15.3646 86.4053 Q 14.4803 85.3229, 12.8337 85.3229 Q 11.1872 85.3229, 10.2877 86.3901 Q 9.40339 87.4573, 9.40339 89.5917 Q 9.40339 91.7719, 10.2877 92.8696 Q 11.1872 93.9673, 12.8337 93.9673 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-2\" d=\"M 43.8553 144.944 Q 43.8553 142.353, 45.1359 140.904 Q 46.4166 139.456, 48.8102 139.456 Q 51.2038 139.456, 52.4845 140.904 Q 53.7651 142.353, 53.7651 144.944 Q 53.7651 147.567, 52.4692 149.061 Q 51.1733 150.54, 48.8102 150.54 Q 46.4318 150.54, 45.1359 149.061 Q 43.8553 147.582, 43.8553 144.944 M 48.8102 149.32 Q 50.4568 149.32, 51.341 148.222 Q 52.2405 147.109, 52.2405 144.944 Q 52.2405 142.825, 51.341 141.758 Q 50.4568 140.676, 48.8102 140.676 Q 47.1636 140.676, 46.2641 141.743 Q 45.3799 142.81, 45.3799 144.944 Q 45.3799 147.125, 46.2641 148.222 Q 47.1636 149.32, 48.8102 149.32 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-3\" d=\"M 76.7234 84.8766 L 78.309 83.2605 L 76.2051 82.8336 L 76.5252 81.7512 L 78.4615 82.7269 L 78.187 80.6077 L 79.3305 80.5925 L 79.0103 82.7117 L 80.977 81.7969 L 81.3125 82.8336 L 79.178 83.23 L 80.7026 84.8613 L 79.7726 85.5322 L 78.7206 83.5197 L 77.6534 85.5322 L 76.7234 84.8766 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-4\" d=\"M 74.7141 46.8148 L 76.2997 45.1987 L 74.1958 44.7718 L 74.5159 43.6893 L 76.4522 44.6651 L 76.1777 42.5459 L 77.3212 42.5306 L 77.001 44.6498 L 78.9677 43.7351 L 79.3031 44.7718 L 77.1687 45.1682 L 78.6933 46.7995 L 77.7633 47.4703 L 76.7113 45.4579 L 75.6441 47.4703 L 74.7141 46.8148 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-5\" d=\"M 106.672 26.0437 L 108.258 24.4277 L 106.154 24.0008 L 106.474 22.9183 L 108.41 23.894 L 108.136 21.7749 L 109.279 21.7596 L 108.959 23.8788 L 110.926 22.964 L 111.261 24.0008 L 109.127 24.3972 L 110.651 26.0285 L 109.721 26.6993 L 108.669 24.6868 L 107.602 26.6993 L 106.672 26.0437 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-6\" d=\"M 140.639 43.3345 L 142.225 41.7185 L 140.121 41.2916 L 140.441 40.2091 L 142.377 41.1849 L 142.103 39.0657 L 143.246 39.0504 L 142.926 41.1696 L 144.893 40.2549 L 145.228 41.2916 L 143.094 41.688 L 144.618 43.3193 L 143.688 43.9901 L 142.636 41.9776 L 141.569 43.9901 L 140.639 43.3345 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-7\" d=\"M 142.648 81.3964 L 144.234 79.7803 L 142.13 79.3534 L 142.45 78.2709 L 144.386 79.2467 L 144.112 77.1275 L 145.256 77.1123 L 144.935 79.2314 L 146.902 78.3167 L 147.237 79.3534 L 145.103 79.7498 L 146.628 81.3811 L 145.698 82.0519 L 144.646 80.0395 L 143.578 82.0519 L 142.648 81.3964 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-10\" d=\"M 120.632 172.645 Q 120.632 170.053, 121.912 168.605 Q 123.193 167.157, 125.587 167.157 Q 127.98 167.157, 129.261 168.605 Q 130.542 170.053, 130.542 172.645 Q 130.542 175.267, 129.246 176.762 Q 127.95 178.24, 125.587 178.24 Q 123.208 178.24, 121.912 176.762 Q 120.632 175.283, 120.632 172.645 M 125.587 177.021 Q 127.233 177.021, 128.117 175.923 Q 129.017 174.81, 129.017 172.645 Q 129.017 170.526, 128.117 169.459 Q 127.233 168.376, 125.587 168.376 Q 123.94 168.376, 123.041 169.443 Q 122.156 170.511, 122.156 172.645 Q 122.156 174.825, 123.041 175.923 Q 123.94 177.021, 125.587 177.021 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-11\" d=\"M 110.691 102.167 L 112.276 100.551 L 110.172 100.124 L 110.492 99.042 L 112.429 100.018 L 112.154 97.8985 L 113.298 97.8833 L 112.977 100.002 L 114.944 99.0877 L 115.28 100.124 L 113.145 100.521 L 114.67 102.152 L 113.74 102.823 L 112.688 100.811 L 111.621 102.823 L 110.691 102.167 \" fill=\"#191919\"/>\n",
       "<path class=\"bond-0\" d=\"M 347.467,142.275 L 349.614,126.599\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0\" d=\"M 349.614,126.599 L 351.761,110.924\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 353.201,114.453 L 367.891,108.457\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 367.891,108.457 L 382.582,102.461\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 350.32,107.395 L 365.011,101.399\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1\" d=\"M 365.011,101.399 L 379.701,95.4034\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2\" d=\"M 351.761,110.924 L 336.962,99.4458\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2\" d=\"M 336.962,99.4458 L 322.164,87.9679\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3\" d=\"M 321.845,81.1225 L 322.831,49.7107\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-10\" d=\"M 320.253,87.9679 L 288.513,97.1905\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-4\" d=\"M 319.333,48.2055 L 287.841,36.8689\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-5\" d=\"M 286.664,36.9621 L 267.111,62.1711\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-6\" d=\"M 263.023,67.0796 L 247.558,77.5899\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6\" d=\"M 247.558,77.5899 L 232.094,88.1003\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-11\" d=\"M 263.892,67.0796 L 281.548,93.0574\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:2,6\"/>\n",
       "<path class=\"bond-7\" d=\"M 232.094,88.1003 L 253.519,119.624\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8\" d=\"M 249.775,118.91 L 246.816,134.424\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8\" d=\"M 246.816,134.424 L 243.857,149.939\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8\" d=\"M 257.263,120.338 L 254.304,135.852\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8\" d=\"M 254.304,135.852 L 251.345,151.367\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9\" d=\"M 253.519,119.624 L 268.983,109.113\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9\" d=\"M 268.983,109.113 L 284.448,98.6028\" style=\"fill:none;fill-rule:evenodd;stroke:#191919;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"atom-0\" d=\"M 341.634 148.717 Q 341.634 146.125, 342.915 144.676 Q 344.196 143.228, 346.589 143.228 Q 348.983 143.228, 350.263 144.676 Q 351.544 146.125, 351.544 148.717 Q 351.544 151.339, 350.248 152.833 Q 348.952 154.312, 346.589 154.312 Q 344.211 154.312, 342.915 152.833 Q 341.634 151.354, 341.634 148.717 M 346.589 153.092 Q 348.236 153.092, 349.12 151.994 Q 350.02 150.881, 350.02 148.717 Q 350.02 146.597, 349.12 145.53 Q 348.236 144.448, 346.589 144.448 Q 344.943 144.448, 344.043 145.515 Q 343.159 146.582, 343.159 148.717 Q 343.159 150.897, 344.043 151.994 Q 344.943 153.092, 346.589 153.092 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-2\" d=\"M 382.095 96.5516 Q 382.095 93.9598, 383.375 92.5114 Q 384.656 91.0631, 387.05 91.0631 Q 389.443 91.0631, 390.724 92.5114 Q 392.004 93.9598, 392.004 96.5516 Q 392.004 99.1739, 390.709 100.668 Q 389.413 102.147, 387.05 102.147 Q 384.671 102.147, 383.375 100.668 Q 382.095 99.1892, 382.095 96.5516 M 387.05 100.927 Q 388.696 100.927, 389.58 99.8295 Q 390.48 98.7165, 390.48 96.5516 Q 390.48 94.4324, 389.58 93.3652 Q 388.696 92.2828, 387.05 92.2828 Q 385.403 92.2828, 384.503 93.35 Q 383.619 94.4172, 383.619 96.5516 Q 383.619 98.7318, 384.503 99.8295 Q 385.403 100.927, 387.05 100.927 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-3\" d=\"M 319.608 86.3594 L 321.193 84.7434 L 319.09 84.3165 L 319.41 83.234 L 321.346 84.2098 L 321.071 82.0906 L 322.215 82.0753 L 321.895 84.1945 L 323.861 83.2798 L 324.197 84.3165 L 322.062 84.7129 L 323.587 86.3442 L 322.657 87.015 L 321.605 85.0026 L 320.538 87.015 L 319.608 86.3594 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-4\" d=\"M 320.804 48.2634 L 322.389 46.6473 L 320.285 46.2204 L 320.606 45.138 L 322.542 46.1137 L 322.267 43.9945 L 323.411 43.9793 L 323.091 46.0985 L 325.057 45.1837 L 325.393 46.2204 L 323.258 46.6168 L 324.783 48.2481 L 323.853 48.9189 L 322.801 46.9065 L 321.734 48.9189 L 320.804 48.2634 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-5\" d=\"M 284.942 35.3537 L 286.527 33.7376 L 284.423 33.3107 L 284.744 32.2283 L 286.68 33.204 L 286.405 31.0848 L 287.549 31.0696 L 287.229 33.1888 L 289.195 32.274 L 289.531 33.3107 L 287.396 33.7071 L 288.921 35.3384 L 287.991 36.0092 L 286.939 33.9968 L 285.872 36.0092 L 284.942 35.3537 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-6\" d=\"M 261.582 65.4711 L 263.168 63.855 L 261.064 63.4282 L 261.384 62.3457 L 263.32 63.3214 L 263.046 61.2022 L 264.189 61.187 L 263.869 63.3062 L 265.836 62.3914 L 266.171 63.4282 L 264.037 63.8245 L 265.561 65.4559 L 264.631 66.1267 L 263.579 64.1142 L 262.512 66.1267 L 261.582 65.4711 \" fill=\"#191919\"/>\n",
       "<path class=\"atom-9\" d=\"M 241.423 157.094 Q 241.423 154.502, 242.704 153.054 Q 243.985 151.606, 246.378 151.606 Q 248.772 151.606, 250.052 153.054 Q 251.333 154.502, 251.333 157.094 Q 251.333 159.716, 250.037 161.21 Q 248.741 162.689, 246.378 162.689 Q 244 162.689, 242.704 161.21 Q 241.423 159.732, 241.423 157.094 M 246.378 161.47 Q 248.025 161.47, 248.909 160.372 Q 249.808 159.259, 249.808 157.094 Q 249.808 154.975, 248.909 153.908 Q 248.025 152.825, 246.378 152.825 Q 244.732 152.825, 243.832 153.892 Q 242.948 154.96, 242.948 157.094 Q 242.948 159.274, 243.832 160.372 Q 244.732 161.47, 246.378 161.47 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-10\" d=\"M 283.007 96.9944 L 284.592 95.3783 L 282.488 94.9514 L 282.809 93.869 L 284.745 94.8447 L 284.47 92.7255 L 285.614 92.7103 L 285.294 94.8295 L 287.26 93.9147 L 287.596 94.9514 L 285.461 95.3478 L 286.986 96.9792 L 286.056 97.65 L 285.004 95.6375 L 283.937 97.65 L 283.007 96.9944 \" fill=\"#191919\"/>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from rdkit.Chem import Draw\n",
    "Draw.MolsToGridImage(cores.values())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To use RGroupDecomposition:\n",
    "  \n",
    "  1. construct the class on the core rg = RGroupDecomposition(core)\n",
    "  2. Call rg.Add( mol ) on the molecules.  If this returns -1, the molecule is not\n",
    "     compatible with the core\n",
    "  3. After all molecules are added, call rg.Process() to complete the rgroup\n",
    "     decomposition."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "RDKit ERROR: [20:18:22] SMILES Parse Error: syntax error while parsing: CANONICAL_SMILES\n",
      "RDKit ERROR: [20:18:22] SMILES Parse Error: Failed parsing SMILES 'CANONICAL_SMILES' for input: 'CANONICAL_SMILES'\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Added 1000\n",
      "Added 2000\n",
      "Added 3000\n",
      "Added 4000\n",
      "Added 2309 to RGroup Decomposition out of 4340\n"
     ]
    }
   ],
   "source": [
    "rg = RGroupDecomposition(cores.values())\n",
    "\n",
    "mols = []\n",
    "count = 0\n",
    "maxidx = -1\n",
    "for line in open(\"compounds.txt\"):\n",
    "    sm = line.split()[-1]\n",
    "    m = Chem.MolFromSmiles(sm)\n",
    "    if m:\n",
    "        count += 1\n",
    "        maxidx = max(maxidx,rg.Add(m))\n",
    "        if count % 1000 == 0:\n",
    "            print(\"Added\", count)\n",
    "\n",
    "print (\"Added %s to RGroup Decomposition out of %s\"%(maxidx, count))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rg.Process()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Rendering this many molecules is pretty taxing for the current system, this takes a while.\n",
    "\n",
    "It is useful to disable logging here.  When making RGroup renderings there \n",
    "are a lot of sanitization warnings."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rdkit import rdBase\n",
    "rdBase.DisableLog(\"rdApp.*\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The RGroupDecomposition code is quite compatible with the python pandas integration.\n",
    "Calling rg.GetRGroupsAsColumns() can be sent directly into a pandas table.\n",
    "\n",
    "**n.b.** You need to call PandasTools.ChangeMoleculeRendering(frame) to allow the molecules\n",
    "to be rendered properly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame(rg.GetRGroupsAsColumns())\n",
    "PandasTools.ChangeMoleculeRendering(frame)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Just show the first few (for speed and to keep the notebook small)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Core</th>\n",
       "      <th>R1</th>\n",
       "      <th>R5</th>\n",
       "      <th>R6</th>\n",
       "      <th>R7</th>\n",
       "      <th>R8</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "      <td><img data-content=\"rdkit/molecule\" src=\"\" alt=\"Mol\"/></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f2 = pd.DataFrame(frame.head())\n",
    "PandasTools.ChangeMoleculeRendering(f2)\n",
    "HTML(f2.to_html())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (conda rdkit_build)",
   "language": "python",
   "name": "rdkit_build"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
